{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LAB 06.01 - Clustering companies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "!wget --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/ai4eng.v1/main/content/init.py\n", "import init; init.init(force_download=False); init.get_weblink()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from local.lib.rlxmoocapi import submit, session\n", "session.LoginSequence(endpoint=init.endpoint, course_id=init.course_id, lab_id=\"L06.01\", varname=\"student\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataset\n", "\n", "observe the following dataset with daily stock data from different companies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from IPython.display import Image\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(60, 963)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "d = pd.read_csv(\"local/data/company-stock-movements-2010-2015-incl.csv.gz\", index_col=0)\n", "d.shape\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 2010-01-04 | \n", "2010-01-05 | \n", "2010-01-06 | \n", "2010-01-07 | \n", "2010-01-08 | \n", "2010-01-11 | \n", "2010-01-12 | \n", "2010-01-13 | \n", "2010-01-14 | \n", "2010-01-15 | \n", "... | \n", "2013-10-16 | \n", "2013-10-17 | \n", "2013-10-18 | \n", "2013-10-21 | \n", "2013-10-22 | \n", "2013-10-23 | \n", "2013-10-24 | \n", "2013-10-25 | \n", "2013-10-28 | \n", "2013-10-29 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Apple | \n", "0.580000 | \n", "-0.220005 | \n", "-3.409998 | \n", "-1.170000 | \n", "1.680011 | \n", "-2.689994 | \n", "-1.469994 | \n", "2.779997 | \n", "-0.680003 | \n", "-4.999995 | \n", "... | \n", "0.320008 | \n", "4.519997 | \n", "2.899987 | \n", "9.590019 | \n", "-6.540016 | \n", "5.959976 | \n", "6.910011 | \n", "-5.359962 | \n", "0.840019 | \n", "-19.589981 | \n", "
AIG | \n", "-0.640002 | \n", "-0.650000 | \n", "-0.210001 | \n", "-0.420000 | \n", "0.710001 | \n", "-0.200001 | \n", "-1.130001 | \n", "0.069999 | \n", "-0.119999 | \n", "-0.500000 | \n", "... | \n", "0.919998 | \n", "0.709999 | \n", "0.119999 | \n", "-0.480000 | \n", "0.010002 | \n", "-0.279998 | \n", "-0.190003 | \n", "-0.040001 | \n", "-0.400002 | \n", "0.660000 | \n", "
Amazon | \n", "-2.350006 | \n", "1.260009 | \n", "-2.350006 | \n", "-2.009995 | \n", "2.960006 | \n", "-2.309997 | \n", "-1.640007 | \n", "1.209999 | \n", "-1.790001 | \n", "-2.039994 | \n", "... | \n", "2.109985 | \n", "3.699982 | \n", "9.570008 | \n", "-3.450013 | \n", "4.820008 | \n", "-4.079986 | \n", "2.579986 | \n", "4.790009 | \n", "-1.760009 | \n", "3.740021 | \n", "
American express | \n", "0.109997 | \n", "0.000000 | \n", "0.260002 | \n", "0.720002 | \n", "0.190003 | \n", "-0.270001 | \n", "0.750000 | \n", "0.300004 | \n", "0.639999 | \n", "-0.130001 | \n", "... | \n", "0.680001 | \n", "2.290001 | \n", "0.409996 | \n", "-0.069999 | \n", "0.100006 | \n", "0.069999 | \n", "0.130005 | \n", "1.849999 | \n", "0.040001 | \n", "0.540001 | \n", "
Boeing | \n", "0.459999 | \n", "1.770000 | \n", "1.549999 | \n", "2.690003 | \n", "0.059997 | \n", "-1.080002 | \n", "0.360000 | \n", "0.549999 | \n", "0.530002 | \n", "-0.709999 | \n", "... | \n", "1.559997 | \n", "2.480003 | \n", "0.019997 | \n", "-1.220001 | \n", "0.480003 | \n", "3.020004 | \n", "-0.029999 | \n", "1.940002 | \n", "1.130005 | \n", "0.309998 | \n", "
5 rows × 963 columns
\n", "